home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
ftp.mactech.com 2010
/
ftp.mactech.com.tar
/
ftp.mactech.com
/
machack
/
Hacks97
/
NewsTicker.sit
/
NewsTicker
/
source code
/
Extractors
/
CNNExtractor.cp
< prev
next >
Wrap
Text File
|
1997-06-19
|
4KB
|
150 lines
/*------------------------------------------------------------------------------
#
# NewsTicker, my Hack for 1997
#
# CNNExtractor.h - Derived from HTMLExtractor, we get passed the tokens
# and try to recognize headlines out of it. We parse
# the page "www.cnn.com", the news page of cnn
#
------------------------------------------------------------------------------*/
#include "TickerGlobals.h"
#include "CNNExtractor.h"
#include "HTMLExtractor.h"
// Refresh every 20 minutes
long gCNNNextTime = 0;
#define kCNNPeriod 1200
#define kCNNAddress "www.cnn.com"
class CNNExtractor: public HTMLExtractor
{
protected:
enum CNNParser { kncParsing,
//text headlines are <a>headline</a>
kncHasLink,
//Some big stories are <h2>text</h2>(other><a>
kncHasHeader, kncHasNotHeader };
CNNParser mfCurrentState;
Str255 mfTheURL;
Str255 mfTheSubject;
public:
CNNExtractor(sMyDataPtr theDataPtr);
virtual ~CNNExtractor (void){ }
virtual void HandleToken(char* string, short numchars, Boolean isCommand);
};
//
// We just parse the entries to find the element
//
CNNExtractor::CNNExtractor(sMyDataPtr theDataPtr)
:HTMLExtractor(kCNNAddress, 1003, theDataPtr)
{
unsigned long now;
mfCurrentState = kncParsing; //just waiting for our thing to come through
GetDateTime(&now);
gCNNNextTime = now + kCNNPeriod; //refresh the news every 20 minutes
}
void CNNExtractor::HandleToken(char* string, short numchars, Boolean isCommand)
{
if (isCommand)
{
switch (mfCurrentState)
{
case kncParsing: //from nothing, we want H2 or A
if (MyCompareStr(string, "<H2>"))
{
mfCurrentState = kncHasHeader;
mfTheSubject[0] = 0;
mfTheURL[0] = 0;
}
else if (MyCompareStr(string, "<A "))
{
if (HTMLExtractor::ParseGoodURL(string+2, mfTheURL))
{
mfCurrentState = kncHasLink;
mfTheSubject[0] = 0;
}
else mfCurrentState = kncParsing;
}
break;
case kncHasLink: //if we hit another tage when in a has link,
mfCurrentState = kncParsing; //abort
break;
case kncHasHeader: //for this, only waiting for </H2>
if (MyCompareStr(string, "</H2>"))
mfCurrentState = kncHasNotHeader;
else mfCurrentState = kncParsing;
break;
case kncHasNotHeader: //for this, waiting for <a>
if (MyCompareStr(string, "<A "))
{
if (HTMLExtractor::ParseGoodURL(string+2, mfTheURL))
{
AddEntry(mfTheSubject, mfTheURL);
mfCurrentState = kncParsing;
}
else mfCurrentState = kncParsing;
}
break;
}
}
else
{
if ((mfCurrentState==kncHasHeader) //OK, get got a headline!
||(mfCurrentState==kncHasLink))
{
if (numchars>255)
numchars = 255;
mfTheSubject[0] = numchars;
BlockMove(string, &mfTheSubject[1], numchars);
//skip some extraneous CNN stuff
if (EqualString(mfTheSubject, "\pIMPACT", false, false))
mfCurrentState = kncParsing;
if (EqualString(mfTheSubject, "\pF U L L S T O R Y", false, false))
mfCurrentState = kncParsing;
if (EqualString(mfTheSubject, "\pTEXT - ONLY VERSION", false, false))
mfCurrentState = kncParsing;
if (mfCurrentState==kncHasLink)
{
//Add the entry
AddEntry(mfTheSubject, mfTheURL);
mfCurrentState = kncParsing;
}
}
}
}
void LoadCNN(sMyDataPtr gGlobalsPtr)
{
CNNExtractor* theparser = new CNNExtractor(gGlobalsPtr);
theparser->ReadEntries();
delete theparser;
InitCursor();
}
// This reloads us if necessary
Boolean MustReloadCNN(sMyDataPtr gGlobalsPtr)
{
unsigned long now;
GetDateTime(&now);
if (now<gCNNNextTime) //time to check yet?
return false;
return true; //always recheck on the time
}